library(tidyverse)
library(lubridate)
library(plotly)
library(skimr)
theme_set(theme_minimal())

Data Preparation

df_case <- read.csv("../data/case_cost_prediction_train.csv")
df_case <- df_case %>% 
  mutate(tglpelayanan = as.Date(tglpelayanan))
df_case <- df_case %>% 
  arrange(kddati2, tkp, tglpelayanan)

# Validation data
df_case_val <- read.csv("../data/case_cost_prediction_val.csv")
df_case_val <- df_case_val %>% 
  mutate(tglpelayanan = as.Date(tglpelayanan))
df_case_val <- df_case_val %>% 
  arrange(kddati2, tkp, tglpelayanan)

# Combine
df_case_all <- bind_rows(
  mutate(df_case, cat = "Train"),
  mutate(df_case_val, cat = "Test")
) %>% 
  arrange(kddati2, tkp, tglpelayanan)

# Small features engineering
df_case <- df_case %>% 
  mutate(m = month(tglpelayanan),
         y = year(tglpelayanan)) %>% 
  mutate(comb = paste0(a, b, c, cb, d, ds, gd, hd, i1, i2, i3, i4, kb, kc, kg, ki, kj, kk, kl, km, ko, kp, kt, ku, 
                       s, sa, sb, sc, sd))

df_monthly <- df_case %>% 
  group_by(tglpelayanan, tkp) %>% 
  summarise(total_case = sum(case),
            total_cost = sum(unit_cost * case),
            avg_cost = total_cost / total_case,
            kd = n_distinct(kddati2),
            case_per_kd = total_case / kd,
            cost_per_kd = total_cost / kd)


# KD level
df_kd <- df_case %>% 
  group_by(kddati2, tglpelayanan, tkp, comb) %>% 
  summarise(cnt = n(),
            peserta = sum(peserta),
            case = sum(case),
            case_ratio = case / peserta,
            unit_cost = sum(unit_cost),
            cost_per_case = unit_cost / case)
df_case_all <- df_case_all %>% 
  # rowwise() %>% 
  mutate(comb_sum = a+b+c+cb+d+ds+gd+hd+i1+i2+i3+i4+kb+kc+kg+ki+kj+kk+kl+km+ko+kp+kt+ku+s+sa+sb+sc+sd)

Data Exploration

Checking

Validation

df_case_val %>% 
  count(kddati2)

Not existed in testing

df_case %>% 
  count(kddati2, tkp) %>% 
  rename(n_train = n) %>% 
  anti_join(
    df_case_val %>% 
      count(kddati2, tkp) %>% 
      rename(n_test = n) 
  )
df_case_all %>% 
  count(kddati2, tglpelayanan, peserta) %>% 
  filter(n != 2)

General

How many dates?

p <- df_case %>%
  count(tglpelayanan) %>% 
  ggplot(aes(tglpelayanan, n)) +
  geom_line()
ggplotly(p)

How many cases per date?

p <- df_monthly %>% 
  ggplot(aes(tglpelayanan, total_case, color = factor(tkp))) +
  geom_line()
ggplotly(p)

Unit cost?

p <- df_monthly %>% 
  filter(tkp == 30, tglpelayanan < ymd(20210601)) %>% 
  ggplot(aes(tglpelayanan, avg_cost, color = factor(tkp))) +
  geom_line()
ggplotly(p)
p <- df_monthly %>% 
  filter(tkp == 40, tglpelayanan < ymd(20210601)) %>% 
  ggplot(aes(tglpelayanan, avg_cost, color = factor(tkp))) +
  geom_line()
ggplotly(p)

Train-Test Distribution

df_case %>% 
  filter(tkp == 30) %>% 
  select(row_id, tglpelayanan, kddati2) %>% 
  mutate(cat = "Train") %>% 
  bind_rows(
    df_case_val %>% 
      filter(tkp == 30) %>% 
      select(row_id, tglpelayanan, kddati2) %>% 
      mutate(cat = "Test")
  ) %>% 
  mutate(kddati2 = as.factor(kddati2)) %>%
  ggplot(aes(kddati2, tglpelayanan, fill = cat)) +
  coord_flip(expand = c(0,0)) +
  geom_tile() +
  theme_minimal()

df_case_all %>% 
  filter(tkp == 30) %>%
  group_by(tglpelayanan, cat) %>% 
  summarise(cnt = n()) %>% 
  ggplot(aes(tglpelayanan, cnt, fill = cat)) +
  geom_col() +
  theme_minimal()

Monthly Seasonality

df_case

Correlation (All)

cor_df <- df_case_all %>% 
  group_by(kddati2, tkp) %>% 
  # mutate_at(vars(a, b, c, cb, d, ds, gd, hd, i1, i2, i3, i4, kb, kc, kg, ki, kj, kk, kl, km, ko, kp, kt, ku, 
  #                s, sa, sb, sc, sd, comb_sum, case),
  #           funs(diff = . - lag(.))) %>% 
  mutate_at(vars(a, b, c, cb, d, ds, gd, hd, i1, i2, i3, i4, kb, kc, kg, ki, kj, kk, kl, km, ko, kp, kt, ku,
                 s, sa, sb, sc, sd, comb_sum, case, unit_cost),
            funs(lag = lag(.),
                 lead = lead(.))) %>%
  ungroup() %>% 
  select(-ds) %>% 
  filter(cat == "Train") %>%
  select_if(is.numeric) %>% 
  corrr::correlate() %>% 
  corrr::focus(case, unit_cost) %>% 
  arrange(desc(abs(case)))

cor_df
cor_df %>% 
  arrange(desc(abs(unit_cost)))

Correlation (Peserta, Case)

df_case %>% 
  select(peserta, case, unit_cost) %>% 
  corrr::correlate()
df_case_wider <- df_case %>% 
  select(tglpelayanan, kddati2, tkp, peserta, case, unit_cost) %>% 
  pivot_wider(id_cols = c(tglpelayanan, kddati2), 
              names_from = tkp, values_from = c(peserta, case, unit_cost)) %>% 
  arrange(kddati2, tglpelayanan) %>% 
  mutate(peserta = coalesce(peserta_30, peserta_40)) %>% 
  mutate_at(vars(case_30, case_40, unit_cost_30, unit_cost_40, peserta),
            funs(lag1 = lag(., 1),
                 lag2 = lag(., 2),
                 lag3 = lag(., 3),
                 lead1 = lead(., 1),
                 lead2 = lead(., 2),
                 lead3 = lead(., 3)))

# all same
# df_case_wider %>% 
#   filter(!is.na(peserta_30), !is.na(peserta_40)) %>% 
#   filter(peserta_30 != peserta_40) %>% 
#   View()

# 20,206
df_case_wider %>% 
  select(peserta, case_30, case_40, unit_cost_30, unit_cost_40) %>% 
  drop_na() %>% 
  corrr::correlate()
# 10,802
df_case_wider %>% 
  select(peserta, case_30, case_40, unit_cost_30, unit_cost_40, contains("lag1")) %>% 
  drop_na() %>% 
  corrr::correlate() %>% 
  corrr::stretch(na.rm = TRUE, remove.dups = TRUE) %>% 
  arrange(desc(r))
# 10,802
df_case_wider %>% 
  select(peserta, case_30, case_40, unit_cost_30, unit_cost_40, contains("lead1")) %>% 
  drop_na() %>% 
  corrr::correlate() %>% 
  corrr::stretch(na.rm = TRUE, remove.dups = TRUE) %>% 
  filter(x %in% c("case_30", "case_40", "unit_cost_30", "unit_cost_40") | 
           y %in% c("case_30", "case_40", "unit_cost_30", "unit_cost_40")) %>% 
  arrange(desc(r))
# 5,755
df_case_wider %>% 
  select(peserta, case_30, case_40, unit_cost_30, unit_cost_40, contains("lead1"), contains("lag1")) %>% 
  drop_na() %>% 
  corrr::correlate() %>% 
  corrr::stretch(na.rm = TRUE, remove.dups = TRUE) %>% 
  filter(x %in% c("case_30", "case_40", "unit_cost_30", "unit_cost_40") | 
           y %in% c("case_30", "case_40", "unit_cost_30", "unit_cost_40")) %>% 
  arrange(desc(r))

Outlier

# MAPE >= 200%?
df_case_all <- df_case_all %>% 
  group_by(kddati2, tkp) %>% 
  mutate(case_mean = mean(case, na.rm = TRUE),
         case_sd = sd(case, na.rm = TRUE)) %>% 
  ungroup() %>% 
  mutate(is_outlier = (case >= case_mean + 2 * case_sd) | (case <= case_mean - 2 * case_sd),
         is_lower_outlier = (case <= case_mean - 2 * case_sd),
         is_upper_outlier = (case >= case_mean + 2 * case_sd))

df_case_all %>% 
  filter(!is.na(is_lower_outlier)) %>% 
  count(is_lower_outlier, is_upper_outlier) %>% 
  mutate(pct = n / sum(n) * 100)
df_case_all %>% 
  filter(!is.na(is_lower_outlier)) %>% 
  group_by(tglpelayanan, is_outlier) %>% 
  summarise(cnt = n()) %>% 
  ggplot(aes(tglpelayanan, cnt, fill = is_outlier)) +
  geom_col() +
  theme_minimal()

df_case_all <- df_case_all %>% 
  group_by(kddati2, tkp) %>% 
  mutate(p_mean = mean(peserta, na.rm = TRUE),
         p_sd = sd(peserta, na.rm = TRUE)) %>% 
  ungroup() %>% 
  mutate(is_outlier = (peserta >= p_mean + 2 * p_sd) | (peserta <= p_mean - 2 * p_sd),
         is_lower_outlier = (peserta <= p_mean - 2 * p_sd),
         is_upper_outlier = (peserta >= p_mean + 2 * p_sd))

df_case_all %>% 
  count(cat, is_lower_outlier, is_upper_outlier) %>% 
  group_by(cat) %>% 
  mutate(pct = n / sum(n) * 100)

Visualize for Each KD

viz_kd <- function(id) {
  res <- df_case_all %>% 
    filter(kddati2 == id)
  res %>% 
    group_by(tglpelayanan, metrics = "peserta") %>% 
    summarise(val = mean(peserta, na.rm = TRUE)) %>% 
    ungroup() %>% 
    bind_rows(
      res %>% 
        filter(tkp == 30) %>% 
        transmute(tglpelayanan, metrics = "case_30", val = case, cat),
      res %>% 
        filter(tkp == 40) %>% 
        transmute(tglpelayanan, metrics = "case_40", val = case, cat)
    ) %>% 
    ggplot(aes(tglpelayanan, val, fill = metrics)) +
    geom_line() +
    facet_wrap(~metrics, scales = "free_y", nrow = 3) +
    labs(subtitle = id)
}
viz_kd(300)

viz_kd(50)

Baseline

df_pred <- df_case %>% 
  group_by(kddati2) %>% 
  mutate(predict_case = median(case),
         predict_unit_cost = median(unit_cost)) %>% 
  ungroup() %>% 
  select(row_id, case, predict_case, unit_cost, predict_unit_cost)

set.seed(100)
case_random <- runif(nrow(df_case), 0.85, 1.15)
set.seed(1000)
unit_cost_random <- runif(nrow(df_case), 0.95, 1.07)
df_pred <- df_case %>% 
  mutate(case_random_ = case_random,
         unit_cost_random_ = unit_cost_random) %>% 
  mutate(predict_case = case * case_random_,
         predict_unit_cost = unit_cost * unit_cost_random) %>% 
  select(row_id, case, predict_case, unit_cost, predict_unit_cost)

mape <- function(y_actual, y_pred) {
  mean(abs( (y_actual - y_pred) / y_actual ))
}

mae <- function(y_actual, y_pred) {
  mean(abs( (y_actual - y_pred) ))
}

# MAPE
cat("MAPE Cost", mape(df_pred$unit_cost, df_pred$predict_unit_cost), "\n")
## MAPE Cost 0.03080327
cat("MAPE Case", mape(df_pred$case, df_pred$predict_case), "\n")
## MAPE Case 0.07498841
# MAE
cat("MAPE Cost", mae(df_pred$unit_cost, df_pred$predict_unit_cost), "\n")
## MAPE Cost 60476.69
cat("MAPE Case", mae(df_pred$case, df_pred$predict_case), "\n")
## MAPE Case 489.5009

Baseline average previous and after value

df_pred <- df_case %>% 
  group_by(kddati2, tkp) %>% 
  mutate(predict_case = (coalesce(lag(case), lead(case)) + coalesce(lead(case), lag(case))) / 2,
         predict_unit_cost = (coalesce(lag(unit_cost), lead(unit_cost)) + coalesce(lead(unit_cost), lag(unit_cost))) / 2) %>% 
  ungroup() %>% 
  select(row_id, case, predict_case, unit_cost, predict_unit_cost) %>% 
  filter(!is.na(predict_case), !is.na(predict_unit_cost))

# MAPE
cat("MAPE Cost", mape(df_pred$unit_cost, df_pred$predict_unit_cost), "\n")
## MAPE Cost 0.04524511
cat("MAPE Case", mape(df_pred$case, df_pred$predict_case), "\n")
## MAPE Case 1.90452
# MAE
cat("MAE Cost", mae(df_pred$unit_cost, df_pred$predict_unit_cost), "\n")
## MAE Cost 77977.58
cat("MAE Case", mae(df_pred$case, df_pred$predict_case), "\n")
## MAE Case 641.8952

Lead

df_pred <- df_case_all %>% 
  group_by(kddati2, tkp) %>% 
  mutate(predict_case = lead(case),
         predict_unit_cost = lead(unit_cost)) %>% 
  fill(predict_case, predict_unit_cost, .direction = c("down")) %>% 
  ungroup() %>% 
  select(tglpelayanan, row_id, case, predict_case, unit_cost, predict_unit_cost, cat) %>% 
  filter(!is.na(predict_case), !is.na(predict_unit_cost), cat == "Train")

# MAPE
nrow(df_pred)
## [1] 57783
cat("MAPE Cost", mape(df_pred$unit_cost, df_pred$predict_unit_cost), "\n")
## MAPE Cost 0.03648845
cat("MAPE Case", mape(df_pred$case, df_pred$predict_case), "\n")
## MAPE Case 0.1723142
# MAE
cat("MAE Cost", mae(df_pred$unit_cost, df_pred$predict_unit_cost), "\n")
## MAE Cost 60330.77
cat("MAE Case", mae(df_pred$case, df_pred$predict_case), "\n")
## MAE Case 497.4671

Use data test too

df_pred2 <- df_case_all %>% 
  group_by(kddati2, tkp) %>% 
  mutate(predict_case = (coalesce(lag(case), lead(case)) + coalesce(lead(case), lag(case))) / 2,
         predict_unit_cost = (coalesce(lag(unit_cost), lead(unit_cost)) + coalesce(lead(unit_cost), lag(unit_cost))) / 2) %>% 
  ungroup() %>% 
  select(tglpelayanan, row_id, case, predict_case, unit_cost, predict_unit_cost, cat) %>% 
  filter(!is.na(predict_case), !is.na(predict_unit_cost), cat == "Train")

# MAPE
nrow(df_pred2)
## [1] 52528
cat("MAPE Cost", mape(df_pred2$unit_cost, df_pred2$predict_unit_cost), "\n")
## MAPE Cost 0.04660915
cat("MAPE Case", mape(df_pred2$case, df_pred2$predict_case), "\n")
## MAPE Case 1.667445
# MAE
cat("MAE Cost", mae(df_pred2$unit_cost, df_pred2$predict_unit_cost), "\n")
## MAE Cost 79258.34
cat("MAE Case", mae(df_pred2$case, df_pred2$predict_case), "\n")
## MAE Case 655.3304
df_pred <- df_case_all %>% 
  group_by(kddati2, tkp) %>% 
  mutate(predict_case = (coalesce(lag(case), lead(case)) + coalesce(lead(case), lag(case))) / 2,
         predict_unit_cost = (coalesce(lag(unit_cost), lead(unit_cost)) + coalesce(lead(unit_cost), lag(unit_cost))) / 2) %>% 
  ungroup() %>% 
  select(row_id, case, predict_case, unit_cost, predict_unit_cost, cat) %>% 
  filter(cat == "Test")
df_pred_all <- df_pred %>% 
  mutate(pe = abs( (case - predict_case) / case ) ) %>% 
  left_join(
    df_pred2 %>% 
      mutate(pe2 = abs( (case - predict_case) / case ) ) %>% 
      rename(predict_case2 = predict_case, predict_unit_cost2 = predict_unit_cost) %>% 
      select(-case, -unit_cost)
  )
df_pred_all %>% 
  summary()
##      row_id           case        predict_case        unit_cost    
##  Min.   :    1   Min.   : NA     Min.   :     1.0   Min.   : NA    
##  1st Qu.: 6159   1st Qu.: NA     1st Qu.:   436.2   1st Qu.: NA    
##  Median :12317   Median : NA     Median :  1409.0   Median : NA    
##  Mean   :12317   Mean   :NaN     Mean   :  6485.8   Mean   :NaN    
##  3rd Qu.:18475   3rd Qu.: NA     3rd Qu.:  4574.0   3rd Qu.: NA    
##  Max.   :24633   Max.   : NA     Max.   :319056.0   Max.   : NA    
##                  NA's   :24633   NA's   :2326       NA's   :24633  
##  predict_unit_cost      cat                  pe         tglpelayanan  
##  Min.   :  115621   Length:24633       Min.   : NA     Min.   :NA     
##  1st Qu.:  234019   Class :character   1st Qu.: NA     1st Qu.:NA     
##  Median : 1689109   Mode  :character   Median : NA     Median :NA     
##  Mean   : 1975835                      Mean   :NaN     Mean   :NA     
##  3rd Qu.: 3544800                      3rd Qu.: NA     3rd Qu.:NA     
##  Max.   :26905500                      Max.   : NA     Max.   :NA     
##  NA's   :2326                          NA's   :24633   NA's   :24633  
##  predict_case2   predict_unit_cost2      pe2       
##  Min.   : NA     Min.   : NA        Min.   : NA    
##  1st Qu.: NA     1st Qu.: NA        1st Qu.: NA    
##  Median : NA     Median : NA        Median : NA    
##  Mean   :NaN     Mean   :NaN        Mean   :NaN    
##  3rd Qu.: NA     3rd Qu.: NA        3rd Qu.: NA    
##  Max.   : NA     Max.   : NA        Max.   : NA    
##  NA's   :24633   NA's   :24633      NA's   :24633

Baseline (Check Val)

Check data completeness

test_fe <- df_case_val %>% 
  select(row_id, tglpelayanan, kddati2, tkp, peserta) %>% 
  mutate(cat = "Test") %>% 
  bind_rows(
    df_case %>% 
    select(row_id, tglpelayanan, kddati2, tkp, peserta, case, unit_cost) %>% 
    mutate(cat = "Train")
  ) %>% 
  arrange(kddati2, tkp, tglpelayanan) %>% 
  group_by(kddati2, tkp) %>% 
  mutate_at(vars(case, peserta),
            funs(lag1 = lag(., 1),
                 lag2 = lag(., 2),
                 lag3 = lag(., 3),
                 lag6 = lag(., 6),
                 lead1 = lead(., 1),
                 lead2 = lead(., 2),
                 lead3 = lead(., 3),
                 lead6 = lead(., 6),)) %>% 
  ungroup()
test_fe %>% 
  group_by(cat) %>% 
  summarise_if(is.numeric, ~sum(ifelse(!is.na(.), 1, 0))) %>% 
  gather(key, val, -cat) %>% 
  group_by(cat) %>% 
  mutate(val_max = max(val)) %>% 
  filter(grepl("lag|lead", key)) %>% 
  mutate(val_pct = val / val_max)

Evaluation

Check submission results

submission <- read.csv("../submission/tahap2_case_cost_prediction.csv") %>% 
  mutate(cat = "Test")
df_case_final <- df_case_all %>% 
  left_join(submission) %>% 
  mutate(case = coalesce(case, predict_case),
         unit_cost = coalesce(unit_cost, predict_unit_cost))
df_case_final %>% 
  filter(cat == "Test") %>% 
  select(predict_case, predict_unit_cost) %>% 
  summary()
##   predict_case      predict_unit_cost 
##  Min.   :  -114.7   Min.   :  130130  
##  1st Qu.:   422.8   1st Qu.:  236190  
##  Median :  1386.0   Median : 1729663  
##  Mean   :  6319.7   Mean   : 1972586  
##  3rd Qu.:  4394.3   3rd Qu.: 3545424  
##  Max.   :172498.8   Max.   :11258508
viz_kd <- function(id) {
  res <- df_case_final %>% 
    filter(kddati2 == id)
  res %>% 
    group_by(tglpelayanan, metrics = "peserta") %>% 
    summarise(val = mean(peserta, na.rm = TRUE)) %>% 
    ungroup() %>% 
    bind_rows(
      res %>% 
        filter(tkp == 30) %>% 
        transmute(tglpelayanan, metrics = "case_30", val = case, cat),
      res %>% 
        filter(tkp == 40) %>% 
        transmute(tglpelayanan, metrics = "case_40", val = case, cat)
    ) %>% 
    ggplot(aes(tglpelayanan, val, fill = metrics)) +
    geom_line() +
    geom_point(aes(tglpelayanan, val, color = cat)) +
    facet_wrap(~metrics, scales = "free_y", nrow = 3) +
    labs(subtitle = id)
}
viz_kd(5)

viz_kd(202)

submission <- read.csv("../submission/tahap2_case_cost_prediction_V2.csv") %>% 
  mutate(cat = "Test")
df_case_final <- df_case_all %>% 
  left_join(submission) %>% 
  mutate(case = coalesce(case, predict_case),
         unit_cost = coalesce(unit_cost, predict_unit_cost))
df_case_final %>% 
  filter(cat == "Test") %>% 
  select(predict_case, predict_unit_cost) %>% 
  summary()
##   predict_case      predict_unit_cost 
##  Min.   :     1.0   Min.   :  115621  
##  1st Qu.:   421.3   1st Qu.:  234319  
##  Median :  1379.3   Median : 1700871  
##  Mean   :  6226.3   Mean   : 1972148  
##  3rd Qu.:  4440.6   3rd Qu.: 3544692  
##  Max.   :143554.5   Max.   :16992385
viz_kd(306)

viz_kd(300)

viz_kd(50)

viz_kd(5)


Session Info
sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] skimr_2.1.3      plotly_4.9.3     lubridate_1.7.10 forcats_0.5.1   
##  [5] stringr_1.4.0    dplyr_1.0.5      purrr_0.3.4      readr_1.4.0     
##  [9] tidyr_1.1.3      tibble_3.1.0     ggplot2_3.3.3    tidyverse_1.3.0 
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.6        assertthat_0.2.1  digest_0.6.27     utf8_1.2.1       
##  [5] R6_2.5.0          cellranger_1.1.0  repr_1.1.3        backports_1.2.1  
##  [9] reprex_2.0.0      evaluate_0.14     highr_0.8         httr_1.4.2       
## [13] pillar_1.5.1      rlang_0.4.10      lazyeval_0.2.2    readxl_1.3.1     
## [17] rstudioapi_0.13   data.table_1.14.0 rmarkdown_2.7     labeling_0.4.2   
## [21] htmlwidgets_1.5.3 munsell_0.5.0     broom_0.7.6       compiler_4.0.5   
## [25] modelr_0.1.8      xfun_0.22         pkgconfig_2.0.3   base64enc_0.1-3  
## [29] htmltools_0.5.1.1 tidyselect_1.1.0  corrr_0.4.3       fansi_0.4.2      
## [33] viridisLite_0.3.0 crayon_1.4.1      dbplyr_2.1.1      withr_2.4.1      
## [37] grid_4.0.5        jsonlite_1.7.2    gtable_0.3.0      lifecycle_1.0.0  
## [41] DBI_1.1.1         magrittr_2.0.1    scales_1.1.1      cli_2.4.0        
## [45] stringi_1.5.3     farver_2.1.0      fs_1.5.0          xml2_1.3.2       
## [49] ellipsis_0.3.1    generics_0.1.0    vctrs_0.3.7       tools_4.0.5      
## [53] glue_1.4.2        hms_1.0.0         crosstalk_1.1.1   yaml_2.2.1       
## [57] colorspace_2.0-0  rvest_1.0.0       knitr_1.31        haven_2.3.1